Preparation
Library
reticulate::use_condaenv("/Users/pixushi/miniconda3/envs/qiime2-2021.2")
rm(list=ls())
# for data
library(readr) # read tsv
library(qiime2R) # read in Qiime artifacts
library(dplyr) # data formatting
library(yaml) # for read_qza() in qiime2R
library(tidyr)
# for computing
library(reticulate) # run py codes
library(phyloseq) # phyloseq object
library(vegan) # distance matrix
library(PERMANOVA) # permanova
library(randomForest) # random forest
library(PRROC) # roc and pr
library(tempted)
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "ndiMatrix" of class "replValueSp"; definition not updated
library(microTensor)
# for plotting
library(ggpubr)
library(ggplot2)
library(gridExtra)
library(RColorBrewer)
library(plotly)
# set working directory to be where the current script is located
setwd(dirname(rstudioapi::getSourceEditorContext()$path))
col_group <- c(brewer.pal(6,'Set2')[6], brewer.pal(6,'Set1')[c(3,1)])
col_microbe <- c(brewer.pal(6,'Set1')[c(2,4)], brewer.pal(6,'Set2')[4])
npc <- 3 # number of components
bws <- 8 # for plotting
Plot timeline of study
meta_ordered <- meta_all[order(meta_all$group, meta_all$host_subject_id, decreasing=T),]
meta_ordered$host_subject_id <- factor(meta_ordered$host_subject_id,
levels=unique(meta_ordered$host_subject_id))
colnames(meta_ordered)[22] <- "Group"
p_timeline <- ggplot(data=meta_ordered,
aes(x=week, y=host_subject_id,
group=as.factor(host_subject_id), color=Group, shape=Group)) +
geom_line() + geom_point() + scale_color_manual(values=col_group) +
labs(y="Host ID", x="Week") + theme_bw() +
theme(legend.position="bottom") +
scale_x_continuous(breaks=seq(from=0,to=90,by=5))
p_timeline

pdf('../figure_table/leukemia_timeline.pdf', width=7.7, height=5)
print(p_timeline)
dev.off()
## quartz_off_screen
## 2
TEMPTED
Run analysis
datlist_all <- format_tempted(count_all, meta_all$week, meta_all$hostID,
threshold=0.95, pseudo=0.5, transform='clr')
print(dim(datlist_all[[1]]))
## [1] 1065 10
svd_all <- svd_centralize(datlist_all, 1)
res_tempted_all <- tempted(svd_all$datlist, r = npc, resolution = 51, smooth=1e-4)
## Calculate the 1th Component
## Convergence reached at dif=1.29075362408583e-05, iter=3
## Calculate the 2th Component
## Convergence reached at dif=7.67492932283745e-05, iter=9
## Calculate the 3th Component
## Convergence reached at dif=3.20041217209923e-05, iter=7
save(svd_all, res_tempted_all, datlist_all,
file='result/res_leukemia_tempted.Rdata')
load('result/res_leukemia_tempted.Rdata')
res_tempted <- res_tempted_all
svd_tempted <- svd_all
meta_tab <- meta_all
count_tab <- count_all
datlist <- datlist_all
Plot TEMPTED time loading
p_time_tempted <- plot_time_loading(res_tempted, r=3) + scale_color_manual(values=col_microbe) +
labs(x='Week', y="Temporal Loading", color="Component") +
geom_line(linewidth=1.5) + theme_bw() +
theme(legend.position='bottom')
Plot TEMPTED subject loadings
A_hat <- metauni
rownames(A_hat) <- A_hat$hostID
table(rownames(A_hat)==rownames(res_tempted$A_hat))
##
## TRUE
## 44
A_hat <- cbind(res_tempted$A_hat[,1:3], A_hat)
A_hat$group <- factor(A_hat$group,
levels=c("healthy wild type", "predisposed, remained healthy", "predisposed, developed pB-ALL"))
p_sub_tempted <- plot_ly(A_hat, x=~PC1, y=~PC2, z=~PC3,
color=~group, colors=col_group)
p_sub_tempted <- p_sub_tempted %>% add_markers(marker=list(size=5))
p_sub_tempted <- p_sub_tempted %>% layout(scene=list(xaxis=list(title="Component 1", titlefont = list(size = 20)),
yaxis=list(title="Component 2", titlefont = list(size = 20)),
zaxis=list(title="Component 3", titlefont = list(size = 20))),
legend = list(font = list(size = 20), orientation = "h"))
p_sub_tempted
htmlwidgets::saveWidget(widget=p_sub_tempted,
file="../figure_table/leukemia_sub_tempted.html",
selfcontained=F)
p_sub23_tempted <- ggplot(data=A_hat, aes(x=PC2, y=PC3)) +
geom_point(aes(color=group)) +
scale_color_manual(values=col_group) +
theme_bw() +
labs(x="Component 2", y="Component3") +
theme(legend.position='bottom')
Plot TEMPTED trajectory of log ratio of top features
datlist_raw <- format_tempted(count_all, meta_all$week, meta_all$hostID,
threshold=0.95, transform='none')
ratio_feat <- ratio_feature(res_tempted, datlist_raw, pct=0.005)
## summed up, by individual subject
tab_feat_ratio <- ratio_feat$metafeature_ratio
colnames(tab_feat_ratio)[2] <- 'hostID'
tab_feat_ratio <- merge(tab_feat_ratio, metauni)
## summed up, by mean and sd
reshape_feat_ratio <- reshape(tab_feat_ratio,
idvar=c("hostID","timepoint") ,
v.names=c("value"), timevar="PC",
direction="wide")
CC <- grep("value", colnames(reshape_feat_ratio))
colnames(reshape_feat_ratio)[CC] <- paste("Component", 1:length(CC))
feature_mat_ratio <- reshape_feat_ratio[,c("Component 2", "Component 3")]
time_vec_ratio <- reshape_feat_ratio$timepoint
group_vec_ratio <- factor(reshape_feat_ratio$group,
levels=c("healthy wild type", "predisposed, remained healthy", "predisposed, developed pB-ALL"))
p_feat_ratio_summary <- plot_feature_summary(feature_mat_ratio,
time_vec_ratio,
group_vec_ratio, bws=bws, nrow=1) +
xlab('Week') + theme_bw() +
theme(legend.position='bottom') +
scale_color_manual(values=col_group) + scale_fill_manual(values=col_group)
Plot all results for main text
lay <- rbind(c(1,2,3,3), c(1,2,3,3), c(1,2,3,3), c(1,2,3,3), c(1,2,3,3),
c(4,5,5,5))
lgd1 <- get_legend(p_time_tempted)
lgd2 <- get_legend(p_sub23_tempted)
p_tempted_all <- grid.arrange(p_time_tempted+theme(legend.position="none"),
p_sub23_tempted+theme(legend.position="none"),
p_feat_ratio_summary+theme(legend.position="none"),
lgd1, lgd2,
layout_matrix=lay)

ggsave('../figure_table/leukemia_tempted.pdf', width=10, height=3, plot=p_tempted_all)
Plot top features associated with disease for supplementary
# the log ratio is constructed using the following features
ftnames <- rownames(datlist_raw[[1]])[-1]
tab_ftnames <- NULL
for (ii in 1:npc){
tab_ftnames <- rbind(tab_ftnames,
data.frame(sequence=ftnames[ratio_feat$toppct[,ii]],
Component=ii, rank="Top"))
tab_ftnames <- rbind(tab_ftnames,
data.frame(sequence=ftnames[ratio_feat$bottompct[,ii]],
Component=ii, rank="Bottom"))
}
taxtab <- read.csv("data/gg2-taxonomy.tsv", sep="\t", row.names=1)
tab_ftnames <- cbind(tab_ftnames, taxtab[tab_ftnames$sequence,])
tab_taxsplit <- t(matrix(unlist(lapply(tab_ftnames$Taxon, function(x){strsplit(x, "; ")[[1]]})), nrow=7))
colnames(tab_taxsplit) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
tab_ftnames <- cbind(tab_ftnames, tab_taxsplit)
shortname <- tab_taxsplit[, "Species"]
for (j in colnames(tab_taxsplit)[6:1]){
shortname[nchar(shortname)<=3] <- tab_taxsplit[nchar(shortname)<=3, j]
}
shortname <- tab_taxsplit[, "Species"]
for (j in colnames(tab_taxsplit)[6:1]){
shortname[nchar(shortname)<=3] <- tab_taxsplit[nchar(shortname)<=3, j]
}
for (ii in 1:(length(shortname)-1)){
ind <- grep(shortname[ii], shortname)
if (length(ind)>1) {
shortname[ind[-1]] <- paste0(shortname[ind[-1]], "_", 1+1:length(ind[-1]))
}
}
tab_ftnames$shortname <- shortname
write.csv(tab_ftnames, file="../figure_table/leukemia_topfeature.csv")
for (ii in 2:3){
tab_ftnames_pc <- dplyr::filter(tab_ftnames, Component==ii)
topfeat_pc <- tab_ftnames_pc$sequence
prop_all <- (count_all)/rowSums(count_all)
if (ii==2){
feat_mat_pc <- prop_all[meta_all$genotype!="WT",topfeat_pc]
meta_pc <- meta_all[meta_all$genotype!="WT",]
}
if (ii==3){
feat_mat_pc <- prop_all[,topfeat_pc]
meta_pc <- meta_all
}
nfeat <- ncol(feat_mat_pc)
tab_topfeat <- data.frame(RA=as.vector(as.matrix(feat_mat_pc)),
feature=rep(tab_ftnames_pc$shortname, each=nrow(feat_mat_pc)),
time=rep(meta_pc$week, nfeat),
group=rep(meta_pc[,ifelse(ii==2, "group", "genotype")], nfeat),
hostID=rep(meta_pc$hostID, nfeat))
p_topfeat_tempted <- ggplot(data=tab_topfeat) +
geom_point(aes(x=time, y=RA, color=group), size=0.8) +
facet_wrap(vars(feature), nrow=3) +
theme_bw() +
labs(x="Week", y="Relative Abundance", color=ifelse(ii==2, "Outcome", "Genotype")) +
ggtitle(paste0("Top Features Identified by Component ", ii)) + theme(legend.position="bottom") +
coord_trans(y="sqrt")
if (ii==2){
p_topfeat_tempted <- p_topfeat_tempted + geom_vline(xintercept = 35, linetype="dashed",
color = "grey", linewidth=1)
}
print(p_topfeat_tempted)
pdf(paste0("../figure_table/leukemia_topfeat", ii, ".pdf"), height=5, width=10)
print(p_topfeat_tempted)
dev.off()
}
## Warning: Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).
## Removed 1 rows containing missing values (`geom_segment()`).


CTF
Run analysis
py_run_file(file="analysis_leukemia_ctf.py",convert=F)
CTF subject loadings
ctf_sub <- read_qza("result/subject-biplot_leukemia.qza")$data$Vectors
ctf_sub$group <- metauni[ctf_sub$SampleID, "group"]
p_sub_ctf <- plot_ly(ctf_sub, x=~`PC1`, y=~`PC2`, z=~`PC3`,
color=~group, colors=col_group)
p_sub_ctf <- p_sub_ctf %>% add_markers(marker=list(size=5))
p_sub_ctf <- p_sub_ctf %>% layout(scene=list(xaxis=list(title="Component 1", titlefont = list(size = 20)),
yaxis=list(title="Component 2", titlefont = list(size = 20)),
zaxis=list(title="Component 3", titlefont = list(size = 20))),
legend = list(font = list(size = 20), orientation = "h"))
p_sub_ctf
htmlwidgets::saveWidget(widget=p_sub_ctf,
file="../figure_table/leukemia_sub_ctf.html",
selfcontained=F)
p_sub12_ctf <- ctf_sub %>% ggplot() +
geom_point(aes(x=PC1, y=PC2, color=group)) +
scale_color_manual(values=col_group) +
theme_bw() +
theme(legend.position="bottom")
p_sub13_ctf <- ctf_sub %>% ggplot() +
geom_point(aes(x=PC1, y=PC3, color=group)) +
scale_color_manual(values=col_group) +
theme_bw() +
theme(legend.position='bottom')
p_sub23_ctf <- ctf_sub %>% ggplot() +
geom_point(aes(x=PC2, y=PC3, color=group)) +
scale_color_manual(values=col_group) +
theme_bw() +
theme(legend.position='bottom')
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
CTF trajectories
# function to read in trajectories
read_traj <- function(file){
tmp <- tempdir()
rm <- TRUE
unzip(file, exdir=tmp)
unpacked<-unzip(file, exdir=tmp, list=TRUE)
artifact<-read_yaml(paste0(tmp,"/", paste0(gsub("/..+","", unpacked$Name[1]),"/metadata.yaml")))
artifact$contents<-data.frame(files=unpacked)
artifact$contents$size=sapply(paste0(tmp, "/", artifact$contents$files), file.size)
artifact$version=read.table(paste0(tmp,"/",artifact$uuid, "/VERSION"))
artifact$data <- read_tsv(paste0(tmp,"/",artifact$uuid,"/data/trajectory.tsv"))
return(artifact)
}
ctf_traj <- as.data.frame(read_traj("result/state-subject-ordination_leukemia.qza")$data)
## Rows: 327 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): #SampleID, subject_id, group
## dbl (5): PC1, PC2, PC3, week_int, week
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ctf_traj$group <- meta_all[ctf_traj$`#SampleID`, "group"]
colnames(ctf_traj)[2:4] <- paste("Component", 1:3)
p_traj_ctf <- plot_feature_summary(ctf_traj[,c("Component 1","Component 2","Component 3")],
ctf_traj$week_int,
ctf_traj$group,
bws=bws) +
xlab('Week') + theme_bw() +
theme(legend.position='bottom') +
scale_color_manual(values=col_group) +
scale_fill_manual(values=col_group)
CTF time loadings
ctf_time <- read_qza("result/state-biplot_leukemia.qza")$data$Vectors
colnames(ctf_time)[1] <- "week_int"
tab_time <- data.frame(time=ctf_time$week_int, value=as.vector(as.matrix(ctf_time[,2:4])),
component=as.factor(as.vector(t(matrix(rep(1:3,nrow(ctf_time)),3,)))))
p_time_ctf <- tab_time %>% ggplot(aes(x=time, y=value, color=component)) +
geom_line(linewidth=1.5) +
scale_color_manual(values=col_microbe) +
labs(x='Week', y="Temporal Loading", color="Component") +
theme_bw() +
theme(legend.position='bottom')
Plot all results
lay <- rbind(c(1,2,3),c(1,2,3),c(1,2,3),c(1,2,3),c(1,2,3),
c(4,4,4),c(4,4,4),c(4,4,4),c(4,4,4),c(4,4,4),
c(5,5,5),
c(6,6,6))
lgd1 <- get_legend(p_time_ctf)
lgd2 <- get_legend(p_sub23_ctf)
p_ctf_all <- grid.arrange(
p_time_ctf+theme(legend.position="none"),
p_sub12_ctf+theme(legend.position="none"),
p_sub23_ctf+theme(legend.position="none"),
p_traj_ctf+theme(legend.position="none"),
lgd1, lgd2,
layout_matrix=lay)

ggsave("../figure_table/leukemia_ctf.pdf", width=8, height=5.5, plot=p_ctf_all)
microTensor
Run analysis
count_tab <- count_all[,colMeans(count_all==0)<=0.95]
meta_tab <- meta_all[,c("sample_name", "hostID", "week", "group", "genotype", "diseased")]
meta_tab$time_disc <- round(meta_tab$week)
tm <- sort(unique(meta_tab$time_disc))
metauni <- unique(meta_tab[,c("hostID", "group", "genotype", "diseased")])
rownames(metauni) <- metauni$hostID
X_array <- array(NA, dim = c(ncol(count_tab),
length(unique(meta_tab$hostID)),
length(tm)))
dimnames(X_array) <- list(colnames(count_tab),
unique(meta_tab$hostID),
tm)
for(k in 1:length(tm)) {
k_df_samples <- meta_tab %>%
dplyr::filter(time_disc == tm[k])
k_df_samples <- k_df_samples[!duplicated(k_df_samples$hostID),]
X_array[, k_df_samples$hostID, k] <-
t(count_tab[rownames(k_df_samples), ])
}
mean(is.na(X_array)) # check proportion of NAs
set.seed(1)
fit_microTensor <-
microTensor::microTensor(X = X_array, R = npc,
nn_t = TRUE, ortho_m = TRUE,
weighted = TRUE)
micro_sub <- as.data.frame(fit_microTensor$s)
colnames(micro_sub) <- paste0("PC",1:npc)
micro_sub$hostID <- dimnames(X_array)[[2]]
micro_sub <- merge(metauni, micro_sub)
write.csv(micro_sub, file="result/subject_microtensor_leukemia.csv")
micro_time <- as.data.frame(fit_microTensor$t)
colnames(micro_time) <- paste0("PC",1:3)
micro_time$time <- tm
write.csv(micro_time, file="result/time_microtensor_leukemia.csv")
micro_traj <- microTensor::create_loading(fit_decomp = fit_microTensor,
feature_names = dimnames(X_array)[[1]],
subject_names = dimnames(X_array)[[2]],
time_names = tm,
class = "sample")
colnames(micro_traj) <- c("hostID", "time_disc", paste0("PC",1:npc))
micro_traj <- merge(meta_tab[,c("sample_name", "hostID", "time_disc", "group", "genotype", "diseased")], micro_traj)
write.csv(micro_traj, file="result/trajectory_microtensor_leukemia.csv")
microTensor subject loading
micro_sub <- read.csv(file="result/subject_microtensor_leukemia.csv", row.names=1, header=T)
p_sub_micro <- plot_ly(micro_sub, x=~PC1, y=~PC2, z=~PC3,
color=~group, colors=col_group)
p_sub_micro <- p_sub_micro %>% add_markers(marker=list(size=5))
p_sub_micro <- p_sub_micro %>% layout(scene=list(xaxis=list(title="Component 1", titlefont = list(size = 20)),
yaxis=list(title="Component 2", titlefont = list(size = 20)),
zaxis=list(title="Component 3", titlefont = list(size = 20))),
legend = list(font = list(size = 20), orientation = "h"))
p_sub_micro
htmlwidgets::saveWidget(widget=p_sub_micro,
file="../figure_table/leukemia_sub_microtensor.html",
selfcontained=F)
p_sub12_micro <- micro_sub %>% ggplot() +
geom_point(aes(x=PC1, y=PC2, color=group)) +
scale_color_manual(values=col_group) +
theme_bw() +
theme(legend.position="bottom")
p_sub13_micro <- micro_sub %>% ggplot() +
geom_point(aes(x=PC1, y=PC3, color=group)) +
scale_color_manual(values=col_group) +
theme_bw() +
theme(legend.position='bottom')
p_sub23_micro <- micro_sub %>% ggplot() +
geom_point(aes(x=PC2, y=PC3, color=group)) +
scale_color_manual(values=col_group) +
theme_bw() +
theme(legend.position='bottom')
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
microTensor trajectories
micro_traj <- read.csv(file="result/trajectory_microtensor_leukemia.csv", header=T, row.names=1)
colnames(micro_traj)[7:9] <- paste("Component", 1:3)
p_traj_micro <- plot_feature_summary(micro_traj[,c("Component 1","Component 2","Component 3")],
micro_traj$time_disc,
micro_traj$group,
bws=bws) +
xlab('Week') + theme_bw() +
theme(legend.position='bottom') +
scale_color_manual(values=col_group) +
scale_fill_manual(values=col_group)
microTensor time loading
micro_time <- read.csv("result/time_microtensor_leukemia.csv", header=T)
tab_micro_time <- tidyr::pivot_longer(micro_time,
cols = c(PC1, PC2, PC3),
names_to = "PC",
values_to = "value")
p_time_micro <- ggplot(data=tab_micro_time) +
geom_line(linewidth=1.5, aes(x=time, y=value, color=PC)) +
scale_color_manual(values=col_microbe) +
labs(x="Study Day", y="Time Loading", color="Component") +
theme_bw() +
theme(legend.position='bottom')
Plot all results
lay <- rbind(c(1,2,3),c(1,2,3),c(1,2,3),c(1,2,3),c(1,2,3),
c(4,4,4),c(4,4,4),c(4,4,4),c(4,4,4),c(4,4,4),
c(5,5,5),
c(6,6,6))
lgd1 <- get_legend(p_time_micro)
lgd2 <- get_legend(p_sub23_micro)
p_micro_all <- grid.arrange(
p_time_micro+theme(legend.position="none"),
p_sub12_micro+theme(legend.position="none"),
p_sub23_micro+theme(legend.position="none"),
p_traj_micro+theme(legend.position="none"),
lgd1, lgd2,
layout_matrix=lay)

ggsave("../figure_table/leukemia_microtensor.pdf", width=8, height=5.5, plot=p_micro_all)
PCoA
# get mds results
mds_mat_all <- NULL
for (i in 1:length(dm_list)){
dist_tab <- dm_list[[i]]
mds_all <- cmdscale(dist_tab, k=3)
mds_mat <- data.frame(hostID=meta_all[rownames(mds_all),'hostID'],
PC=mds_all)
mds_mat$timepoint <- meta_all[rownames(mds_all),'week']
mds_mat <- merge(mds_mat, metauni, by='hostID')
mds_mat$method <- names(dm_list)[i]
mds_mat_all <- rbind(mds_mat_all, mds_mat)
}
mds_mat_all$group <- factor(mds_mat_all$group,
levels=c("healthy wild type", "predisposed, remained healthy", "predisposed, developed pB-ALL"))
colnames(mds_mat_all)
## [1] "hostID" "PC.1" "PC.2" "PC.3" "timepoint" "genotype"
## [7] "diseased" "group" "method"
mds_mat_all$method <- as.factor(mds_mat_all$method)
levels(mds_mat_all$method)
## [1] "bray" "jaccard" "unifrac" "wunifrac"
levels(mds_mat_all$method) <- c("Bray-Curtis", "Jaccard", "UniFrac", "Weighted UniFrac")
names(dm_list)
## [1] "unifrac" "wunifrac" "bray" "jaccard"
names(dm_list) <- c("UniFrac", "Weighted UniFrac", "Bray-Curtis", "Jaccard")
Week 35-70 PERMANOVA for only Pax mice wrt disease
# other methods
mds_mat_Pax <- dplyr::filter(mds_mat_all, genotype!='WT')
# result from tempted
ratio_mat_Pax <- dplyr::filter(reshape_feat_ratio, genotype!='WT')
mth <- levels(mds_mat_all$method)
pval_permanova_late <- rep(NA, length(mth)+2)
names(pval_permanova_late) <- c(mth, "TEMPTED", "CTF")
Fval_permanova_late <- pval_permanova_late
for (j in 1:length(mth)){
tmp <- dplyr::filter(mds_mat_Pax,
timepoint>=35 & timepoint<=70 & method==mth[j])
res_adonis <- adonis2(tmp[,2:4] ~ diseased, method='euclidean',
data=tmp)
pval_permanova_late[j] <- res_adonis$`Pr(>F)`[1]
Fval_permanova_late[j] <- res_adonis$F[1]
}
# for TEMPTED
tmp <- dplyr::filter(ratio_mat_Pax, timepoint>=35 & timepoint<=70)
res_adonis <- adonis2(tmp[,6:8] ~ diseased, method='euclidean',
data=tmp)
pval_permanova_late[length(mth)+1] <- res_adonis$`Pr(>F)`[1]
Fval_permanova_late[length(mth)+1] <- res_adonis$F[1]
# for CTF
tmp <- dplyr::filter(ctf_traj, week_int>=35 & week_int<=70 & group!="healthy wild type")
res_adonis <- adonis2(tmp[,2:4] ~ tmp$group, method='euclidean')
pval_permanova_late[length(mth)+2] <- res_adonis$`Pr(>F)`[1]
Fval_permanova_late[length(mth)+2] <- res_adonis$F[1]
cbind(pval_permanova_late,
Fval_permanova_late)
## pval_permanova_late Fval_permanova_late
## Bray-Curtis 0.137 1.9960173
## Jaccard 0.147 1.9300087
## UniFrac 0.368 0.9555013
## Weighted UniFrac 0.820 0.1450785
## TEMPTED 0.002 6.9220221
## CTF 0.041 3.7071993
write.csv(cbind(pval_permanova_late,
Fval_permanova_late), file="../figure_table/leukemia_permanova.csv")
Trajectory plots for other methods and Wilcoxon rank test of Week
35-70 samples
group_level <- levels(mds_mat_all$group)
nmethod <- length(dm_list)
CI_length <- -qnorm((1-0.95)/2)
tab_summary_all <- NULL
pval_wilcox_late <- Wval_wilcox_late <- matrix(NA, nmethod+2, npc)
rownames(pval_wilcox_late) <- rownames(Wval_wilcox_late) <- c(names(dm_list), "TEMPTED", "CTF")
for (kk in 1:nmethod){
mthd <- names(dm_list)[kk]
time_all <- NULL
mean_all <- NULL
merr_all <- NULL
feature_all <- NULL
group_all <- NULL
ind_mthd <- mds_mat_all$method==mthd
feature_mat <- mds_mat_all[ind_mthd,c(1+1:npc)]
time_vec <- mds_mat_all$timepoint[ind_mthd]
group_vec <- mds_mat_all$group[ind_mthd]
# Wilcoxon test
for (jj in 1:ncol(feature_mat)){
sel <- time_vec>=35 & time_vec<=70 & group_vec!='healthy wild type'
table(group_vec[sel])
test_tmp <- wilcox.test(feature_mat[sel,jj]~group_vec[sel])
pval_wilcox_late[kk,jj] <- test_tmp$p.value
Wval_wilcox_late[kk,jj] <- test_tmp$statistic
}
for (jj in 1:ncol(feature_mat)){
for (ii in 1:length(group_level)){
ind <- group_vec==group_level[ii]
model.np <- npreg(feature_mat[ind,jj]~time_vec[ind], bws=bws,
regtyle="ll", bwmethod="cv.aic", gradients=T)
time_eval <- as.vector(t(model.np$eval))
mean_eval <- model.np$mean[order(time_eval)]
merr_eval <- model.np$merr[order(time_eval)]
time_eval <- sort(time_eval)
time_all <- c(time_all, time_eval)
mean_all <- c(mean_all, mean_eval)
merr_all <- c(merr_all, merr_eval)
feature_all <- c(feature_all,
rep(colnames(feature_mat)[jj], length(time_eval)))
group_all <- c(group_all,
rep(group_level[ii], length(time_eval)))
}
}
group_all <- factor(group_all, levels=group_level)
tab_summary <- data.frame(time=time_all, mean=mean_all, merr=merr_all,
group=group_all, feature=feature_all)
tab_summary$method <- names(dm_list)[kk]
tab_summary_all <- rbind(tab_summary_all, tab_summary)
}
colnames(pval_wilcox_late) <- colnames(Wval_wilcox_late) <- colnames(feature_mat)
pval_wilcox_late <- as.data.frame(pval_wilcox_late)
Wval_wilcox_late <- as.data.frame(Wval_wilcox_late)
# for TEMPTED
sel <- time_vec_ratio>=35 & time_vec_ratio<=70 &
group_vec_ratio!='healthy wild type'
wilcox_tempted <- wilcox.test(feature_mat_ratio$`Component 2`[sel]~
group_vec_ratio[sel])
pval_wilcox_late["TEMPTED","PC.2"] <- wilcox_tempted$p.value
Wval_wilcox_late["TEMPTED","PC.2"] <- wilcox_tempted$statistic
# for CTF
ctf_traj_late <- dplyr::filter(ctf_traj, week_int>=35 & week_int<=70 &
group!='healthy wild type')
wilcox_ctf <- wilcox.test(ctf_traj_late$`Component 1`~ctf_traj_late$group)
pval_wilcox_late["CTF","PC.1"] <- wilcox_ctf$p.value
Wval_wilcox_late["CTF","PC.1"] <- wilcox_ctf$statistic
tab_summary_all$method <- factor(tab_summary_all$method,
levels=unique(tab_summary_all$method))
tab_summary_all$feature <- factor(tab_summary_all$feature,
levels=unique(tab_summary_all$feature))
levels(tab_summary_all$feature) <- paste0("Component ", 1:3)
p_summary <- ggplot(data=tab_summary_all,
aes(x=time, y=mean, group=group, color=group)) +
geom_line() + theme_bw()+
geom_ribbon(aes(ymin=mean-CI_length*merr, ymax=mean+CI_length*merr,
color=group, fill=group), linetype=2, alpha=0.3) +
ylab(paste0('mean +/- ', round(CI_length,2), '*se')) + xlab("Week") +
facet_grid(feature~method) +
theme(legend.position="bottom") +
scale_color_manual(values=col_group) +
scale_fill_manual(values=col_group) +
scale_x_continuous(breaks=c(0,25,50,75,100))
p_summary

pdf("../figure_table/leukemia_mds.pdf", width=8, height=5)
print(p_summary)
dev.off()
## quartz_off_screen
## 2
# Wilcoxon rank test
pval_wilcox_late
## PC.1 PC.2 PC.3
## UniFrac 0.135431301 0.1310596149 0.8488535
## Weighted UniFrac 0.672460669 0.6130066586 0.7591171
## Bray-Curtis 0.006876426 0.6130066586 0.4910435
## Jaccard 0.005848879 0.5229909514 0.4303680
## TEMPTED NA 0.0000156904 NA
## CTF 0.029485574 NA NA
colnames(pval_wilcox_late) <- paste("Component", 1:3)
write.csv(pval_wilcox_late, '../figure_table/leukemia_wilcox_pcoa.csv')